load packages

library(tidyverse)
library(glmnet)
library(caTools)
library(caret)
library(ROCR)

load coded data

coded = read.csv("~/Documents/code/dsnlab/automotion-test-set/tds_artifact_coded_volumes.csv")

load stripe detection data

fileDir = "~/Documents/code/dsnlab/automotion-test-set/output/"
filePattern = "tds_stripes_.*.csv"
  
file_list = list.files(fileDir, pattern = filePattern)

for (file in file_list){
  # if the merged dataset doesn't exist, create it
  if (!exists("stripes")){
    temp = read.csv(paste0(fileDir,file))
    stripes = data.frame(temp) %>% 
      rename("volume" = t,
             "subjectID" = subject) %>%
      select(-file)
    rm(temp)
  }
  
  # if the merged dataset does exist, append to it
  else {
    temp_dataset = read.csv(paste0(fileDir,file))
    temp_dataset = data.frame(temp_dataset) %>% 
      rename("volume" = t,
             "subjectID" = subject) %>%
      select(-file)
    stripes = rbind(stripes, temp_dataset)
    rm(temp_dataset)
  }
}

load global intensities and rps

# define paths and variables
rpDir = '~/Documents/code/dsnlab/automotion-test-set/rp_txt/'
outputDir = '~/Documents/code/tds_auto-motion/auto-motion-output/'
plotDir = '~/Documents/code/tds_auto-motion/auto-motion-output/plots/'
study = "tds2"
rpPattern = "^rp_([0-9]{3})_(.*).txt"
rpCols = c("euclidian_trans","euclidian_rot","euclidian_trans_deriv","euclidian_rot_deriv","trash.rp")

# global intensities
intensities = read.csv(paste0(outputDir,study,'_globalIntensities.csv'))

# edit volume numbers for subject 157, stop3
intensities = intensities %>% 
  mutate(volume = ifelse(subjectID == 157 & run == "stop3" & volume > 43, volume - 1, volume))

# rp files
file_list = list.files(rpDir, pattern = rpPattern)

for (file in file_list){
  # if the merged dataset doesn't exist, create it
  if (!exists("rp")){
    temp = read.table(paste0(rpDir,file))
    colnames(temp) = rpCols
    rp = data.frame(temp, file = rep(file,count(temp))) %>% 
      mutate(volume = row_number()) %>%
      extract(file,c("subjectID","run"), rpPattern) %>%
      mutate(subjectID = as.integer(subjectID))
    rm(temp)
  }
  
  # if the merged dataset does exist, append to it
  else {
    temp_dataset = read.table(paste0(rpDir,file))
    colnames(temp_dataset) = rpCols
    temp_dataset = data.frame(temp_dataset, file = rep(file,count(temp_dataset))) %>% 
      mutate(volume = row_number()) %>%
      extract(file,c("subjectID","run"), rpPattern) %>%
      mutate(subjectID = as.integer(subjectID))
    rp = rbind(rp, temp_dataset)
    rm(temp_dataset)
  }
}

join dataframes

joined = left_join(stripes, coded, by = c("subjectID", "run", "volume")) %>%
  left_join(., intensities, by = c("subjectID", "run", "volume")) %>%
  left_join(., rp, by = c("subjectID", "run", "volume")) %>%
  mutate(striping = ifelse(is.na(striping), 0, striping),
         intensity = ifelse(is.na(intensity), 0, intensity))

machine learning

use lasso logistic regression to fit beta weights for each predictor

# tidy data
data = joined %>%
  mutate(tile = paste0("tile_",tile),
         artifact = ifelse(striping > 1 | intensity > 1, 1, 0)) %>%
  spread(tile, freqtile_power) %>%
  select(-striping, - intensity, -euclidian_rot_deriv, -trash.rp, -fsl.volume) %>%
  select(subjectID, run, volume, euclidian_trans_deriv, artifact, everything())

# create training and test sets
set.seed(101) 
sample = sample.split(data$artifact, SplitRatio = .75)
train = subset(data, sample == TRUE)
test  = subset(data, sample == FALSE)

# subset predictors and criterion
x_train = as.matrix(train[,-c(1,2,3,4,5)])
y_train = as.double(as.matrix(train[, 5]))

# run xval to determine lambda
cv.train <- cv.glmnet(x_train, y_train, family='binomial', alpha=1, parallel=TRUE, standardize=TRUE, type.measure='auc')

plot(cv.train)

plot(cv.train$glmnet.fit, xvar="lambda", label=TRUE)

cv.train$lambda.min
## [1] 0.0003247472
cv.train$lambda.1se
## [1] 0.001194543
coef(cv.train, s=cv.train$lambda.min)
## 16 x 1 sparse Matrix of class "dgCMatrix"
##                             1
## (Intercept)     -4.361452e+00
## volMean          .           
## volSD           -3.745048e-04
## euclidian_trans  1.042512e+00
## euclidian_rot    5.922844e-01
## tile_1          -3.392376e-01
## tile_10          1.253834e+04
## tile_11         -7.033361e+03
## tile_2           2.819671e+01
## tile_3           .           
## tile_4           .           
## tile_5           2.080045e+02
## tile_6          -1.345085e+03
## tile_7           .           
## tile_8           2.824277e+03
## tile_9          -7.034841e+03
coef(cv.train, s=cv.train$lambda.1se)
## 16 x 1 sparse Matrix of class "dgCMatrix"
##                             1
## (Intercept)     -4.754184e+00
## volMean          .           
## volSD            .           
## euclidian_trans  1.098392e+00
## euclidian_rot    4.784833e-01
## tile_1          -1.991706e-02
## tile_10          9.822724e+03
## tile_11         -4.871957e+03
## tile_2           .           
## tile_3           .           
## tile_4          -1.491836e+01
## tile_5           .           
## tile_6          -4.916124e+02
## tile_7           .           
## tile_8           2.018037e+01
## tile_9          -3.738765e+03
# test on sample
pred_train = predict(cv.train, newx = x_train, s=cv.train$lambda.1se, type="response")

# plot cutoff v. accuracy
predicted = prediction(pred_train, y_train, label.ordering = NULL)
perf = performance(predicted, measure = "acc")
perf.df = data.frame(cut=perf@x.values[[1]],acc=perf@y.values[[1]])

ggplot(perf.df, aes(cut, acc)) +
  geom_line()

# plot false v. true positive rate
perf = performance(predicted, measure = "tpr", x.measure = "fpr")
perf.df = data.frame(cut=perf@alpha.values[[1]],fpr=perf@x.values[[1]],tpr=perf@y.values[[1]])

ggplot(perf.df, aes(fpr, tpr)) +
  geom_line()

# plot specificity v. sensitivity
perf = performance(predicted, measure = "sens", x.measure = "spec")
perf.df = data.frame(cut=perf@alpha.values[[1]],sens=perf@x.values[[1]],spec=perf@y.values[[1]])
ggplot(perf.df, aes(spec, sens)) +
  geom_line()

ggplot(perf.df, aes(x = cut)) +
  geom_line(aes(y = sens, color = "sensitivity")) + 
  geom_line(aes(y = spec, color = "specificity"))

cut = perf@alpha.values[[1]][which.max(perf@x.values[[1]]+perf@y.values[[1]])]
ss = max(perf@x.values[[1]]+perf@y.values[[1]]) # sensitivity + specificity

# confusion matrix
pred_train = predict(cv.train, newx = x_train, s=cv.train$lambda.1se, type="response")
pred_train[pred_train > .2] = 1
pred_train[pred_train < .2] = 0
confusionMatrix(pred_train, y_train)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    0    1
##          0 5613   66
##          1   27   70
##                                          
##                Accuracy : 0.9839         
##                  95% CI : (0.9803, 0.987)
##     No Information Rate : 0.9765         
##     P-Value [Acc > NIR] : 4.998e-05      
##                                          
##                   Kappa : 0.5929         
##  Mcnemar's Test P-Value : 8.134e-05      
##                                          
##             Sensitivity : 0.9952         
##             Specificity : 0.5147         
##          Pos Pred Value : 0.9884         
##          Neg Pred Value : 0.7216         
##              Prevalence : 0.9765         
##          Detection Rate : 0.9718         
##    Detection Prevalence : 0.9832         
##       Balanced Accuracy : 0.7550         
##                                          
##        'Positive' Class : 0              
## 
######### test on holdout sample
# subset predictors and criterion
x_test = as.matrix(test[,-c(1,2,3,4,5)])
y_test = as.double(as.matrix(test[, 5]))

# test on holdout sample
pred_test = predict(cv.train, newx = x_test, s=cv.train$lambda.1se, type="response")
pred_test[pred_test > .2] = 1
pred_test[pred_test < .2] = 0

# confusion matrix
confusionMatrix(pred_test, y_test)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    0    1
##          0 1876   23
##          1    4   23
##                                           
##                Accuracy : 0.986           
##                  95% CI : (0.9797, 0.9907)
##     No Information Rate : 0.9761          
##     P-Value [Acc > NIR] : 0.001563        
##                                           
##                   Kappa : 0.6235          
##  Mcnemar's Test P-Value : 0.000532        
##                                           
##             Sensitivity : 0.9979          
##             Specificity : 0.5000          
##          Pos Pred Value : 0.9879          
##          Neg Pred Value : 0.8519          
##              Prevalence : 0.9761          
##          Detection Rate : 0.9740          
##    Detection Prevalence : 0.9860          
##       Balanced Accuracy : 0.7489          
##                                           
##        'Positive' Class : 0               
## 
######### logistic regression
## note: couldn't get predict function to predict test values
# log = glm(artifact ~ volMean + volSD + euclidian_trans + euclidian_rot + tile_1 + tile_2 + tile_3 + tile_4 + tile_5 + tile_6 + tile_7 + tile_8 + tile_9 + tile_10 + tile_11, family='binomial', data=train)
# 
# pred = predict(log, newx = train, type="response")
# predicted = prediction(pred, y_train, label.ordering = NULL)
# perf = performance(predicted, measure = "sens", x.measure = "spec")
# perf.df = data.frame(cut=perf@alpha.values[[1]],sens=perf@x.values[[1]],spec=perf@y.values[[1]])
# ggplot(perf.df, aes(spec, sens)) +
#   geom_line()
# 
# ggplot(perf.df, aes(x = cut)) +
#   geom_line(aes(y = sens, color = "sensitivity")) + 
#   geom_line(aes(y = spec, color = "specificity")) +
#   geom_vline(xintercept = .03)
# 
# pred = predict(log, newx = x_test, type="response")
# pred[pred > .03] = 1
# pred[pred < .03] = 0
# confusionMatrix(pred, y_test)

original auto-motion

make composite of all indicators

comp = joined %>%
  filter(tile == 1 | tile == 10) %>%
  
  # code trash based on mean, sd, and rp 
  group_by(subjectID, run, tile) %>%
  mutate(Diff.mean = volMean - lag(volMean),
         Diff.sd = volSD - lag(volSD)) %>%
  ungroup %>%
  mutate(meanDiff.mean = mean(Diff.mean, na.rm=TRUE),
         sdDiff.mean = sd(Diff.mean, na.rm=TRUE),
         meanDiff.sd = mean(Diff.sd, na.rm=TRUE),
         sdDiff.sd = sd(Diff.sd, na.rm=TRUE),
         
         # code volumes above mean thresholds as trash
         upper.mean = meanDiff.mean + 2*sdDiff.mean,
         lower.mean = meanDiff.mean - 2*sdDiff.mean,
         trash.mean = ifelse(Diff.mean > upper.mean | Diff.mean < lower.mean, 1, 0),
         
         upper.sd = meanDiff.sd + 2*sdDiff.sd,
         lower.sd = meanDiff.sd - 2*sdDiff.sd,
         trash.sd = ifelse(Diff.sd > upper.sd | Diff.sd < lower.sd, 1, 0),
         
         # code volumes with more than +/- .25mm translation or rotation in Euclidian distance
         trash.rp.tr = ifelse(euclidian_trans_deriv > .25 | euclidian_trans_deriv < -.25, 1, trash.rp),
         trash.rp.rot = ifelse(euclidian_rot_deriv > .25 | euclidian_rot_deriv < -.25, 1, trash.rp)) %>%
  select(-meanDiff.mean, -meanDiff.sd, -sdDiff.mean, -sdDiff.sd) %>%
  
  # code trash based on striping
  group_by(subjectID, run, tile) %>%
  mutate(freqtile_power_c = freqtile_power - mean(freqtile_power, na.rm=TRUE)) %>%
  ungroup() %>%
  select(-freqtile_power) %>%
  spread(tile,freqtile_power_c) %>%
  mutate(trash.stripe = ifelse(`1` < -.035 & `10` > .00025, 1, 0)) %>%
  
  # combine trash
  mutate(trash.combined = ifelse(trash.stripe == 1, 1, 0),
         trash.combined = ifelse((trash.rp.tr + trash.rp.rot + trash.mean + trash.sd) > 1, 1, trash.combined)) %>%
         #trash.combined = ifelse(lead(trash.combined) == TRUE, TRUE, trash.combined)) %>%
  
  # recode as trash if volume behind and in front are both marked as trash
  mutate(trash.combined = ifelse(trash.combined == 0 & lag(trash.combined) == 1 & lead(trash.combined) == 1, 1, trash.combined)) %>%
         
  # code first volume as trash if second volume is trash
  mutate(trash.combined = ifelse(volume == 1 & lead(trash.combined) == 1, 1, trash.combined)) %>%

  # code hits
  mutate(hits = ifelse(trash.combined == 1 & (striping == 2 | intensity == 2), "hit",
                #ifelse(trash.combined == 1 & (striping == 1 | intensity == 1), "hit.light",
                ifelse(trash.combined == 0 & (striping == 2 | intensity == 2), "neg",
                #ifelse(trash.combined == 0 & (striping == 1 | intensity == 1), "neg.light",
                ifelse(trash.combined == 1 & (striping == 0 | intensity == 0), "pos", NA))),
         label = ifelse(regexpr('.*', hits), as.character(volume), ''),
         hits = as.factor(hits)) %>%
  gather(tile, freqtile_power_c, c(`1`, `10`)) %>%
  mutate(tile = as.numeric(tile))

compare hit rates

include light trials

training data

# select only one set of observations and code any artifact as 1
comp.tab = comp %>% 
  filter(tile == 1) %>%
  mutate(artifact = ifelse(striping > 0 | intensity > 0, 1, 0),
         hits.tot = ifelse(hits %in% c("hit", "hit.light"), "hit",
                    ifelse(hits %in% c("neg", "neg.light"), "neg",
                    ifelse(hits %in% c("pos"), "pos",
                    ifelse(is.na(hits), "cor.rej", NA)))))

# separate into training and test runs
set.seed(101) 
comp.sample = sample.split(comp.tab$artifact, SplitRatio = .75)
comp.train = subset(comp.tab, sample == TRUE)
comp.test  = subset(comp.tab, sample == FALSE)

# machine learning
confusionMatrix(pred_train, y_train)$table
##           Reference
## Prediction    0    1
##          0 5613   66
##          1   27   70
confusionMatrix(pred_train, y_train)$overall[1]
##  Accuracy 
## 0.9838989
confusionMatrix(pred_train, y_train)$byClass[11]
## Balanced Accuracy 
##         0.7549593
# manual 
table(comp.train$hits.tot)
## 
## cor.rej     hit     neg     pos 
##    5599     101      33      43
#table(comp.train$hits)

cor.rej = table(comp.train$hits.tot)[[1]]
hit = table(comp.train$hits.tot)[[2]]
neg = table(comp.train$hits.tot)[[3]]
pos = table(comp.train$hits.tot)[[4]]
total = cor.rej + hit + neg + pos

sprintf('Accuracy: %s', round((cor.rej + hit) / total,2))
## [1] "Accuracy: 0.99"
# balanced accuracy
sprintf('Balanced accuracy: %s', round(((cor.rej / (cor.rej + pos)) + (hit / (hit + neg))) / 2,2))
## [1] "Balanced accuracy: 0.87"

test data

# machine learning
confusionMatrix(pred_test, y_test)$table
##           Reference
## Prediction    0    1
##          0 1876   23
##          1    4   23
confusionMatrix(pred_test, y_test)$overall[1]
##  Accuracy 
## 0.9859813
confusionMatrix(pred_test, y_test)$byClass[11]
## Balanced Accuracy 
##         0.7489362
# manual 
table(comp.test$hits.tot)
## 
## cor.rej     hit     neg     pos 
##    1868      33      12      13
#table(comp.test$hits)

cor.rej = table(comp.test$hits.tot)[[1]]
hit = table(comp.test$hits.tot)[[2]]
neg = table(comp.test$hits.tot)[[3]]
pos = table(comp.test$hits.tot)[[4]]
total = cor.rej + hit + neg + pos

sprintf('Accuracy: %s', round((cor.rej + hit) / total,2))
## [1] "Accuracy: 0.99"
# balanced accuracy
sprintf('Balanced accuracy: %s', round(((cor.rej / (cor.rej + pos)) + (hit / (hit + neg))) / 2,2))
## [1] "Balanced accuracy: 0.86"

plot composite

y = translation derivative

thresholds = comp %>% 
  filter(tile == 1) %>%
  select(subjectID, run) %>% 
  unique(.) %>% 
  mutate(upper = .25,
         lower = -.25)

ggplot(filter(comp, tile == 1), aes(x = volume, y = euclidian_trans_deriv)) +
  geom_line(size = .25) +
  geom_point(data = subset(comp, !is.na(hits)), aes(color = hits), size = 2.5) +
  geom_text(aes(label = label), size = 1.5) +
  facet_wrap(~ subjectID + run, ncol = 4, scales = "free") +
  scale_color_manual(values = c("#3B9AB2","#EBCC2A","#F21A00")) +
  #scale_color_manual(values = c("#3B9AB2","#78B7C5","#9EBE91","#EBCC2A","#F21A00")) +
  geom_hline(data = thresholds, aes(yintercept = upper), color = "#F21A00") +
  geom_hline(data = thresholds, aes(yintercept = lower), color = "#F21A00") +
  theme(axis.text.x = element_text(size = 6))

y = mean intensity

thresholds = comp %>% 
  filter(tile == 1) %>%
  select(subjectID, run, upper.mean, lower.mean) %>% 
  unique(.) %>%
  mutate(upper = upper.mean,
         lower = lower.mean)

ggplot(filter(comp, tile == 1), aes(x = volume, y = Diff.mean)) +
  geom_line(size = .25) +
  geom_point(data = subset(comp, !is.na(hits)), aes(color = hits), size = 2.5) +
  geom_text(aes(label = label), size = 1.5) +
  facet_wrap(~ subjectID + run, ncol = 4, scales = "free") +
  scale_color_manual(values = c("#3B9AB2","#EBCC2A","#F21A00")) +
  #scale_color_manual(values = c("#3B9AB2","#78B7C5","#9EBE91","#EBCC2A","#F21A00")) +
  geom_hline(data = thresholds, aes(yintercept = upper), color = "#F21A00") +
  geom_hline(data = thresholds, aes(yintercept = lower), color = "#F21A00") +
  theme(axis.text.x = element_text(size = 6))

y = freqtile power

thresholds = comp %>% 
  select(subjectID, run, tile) %>% 
  unique(.) %>% 
  mutate(y = ifelse(tile == 1, -.035, .00025))

ggplot(comp, aes(x = volume, y = freqtile_power_c)) +
  geom_line(size = .25) +
  geom_point(data = subset(comp, !is.na(hits)), aes(color = hits), size = 2.5) +
  geom_text(aes(label = label), size = 1.5) +
  facet_wrap(~ subjectID + run + tile, ncol = 4, scales = "free") +
  scale_color_manual(values = c("#3B9AB2","#EBCC2A","#F21A00")) +
  #scale_color_manual(values = c("#3B9AB2","#78B7C5","#9EBE91","#EBCC2A","#F21A00")) +
  geom_hline(data = thresholds, aes(yintercept = y), color = "#F21A00") +
  theme(axis.text.x = element_text(size = 6))

plot logistic regression

y = translation derivative

# re-run lasso on full sample
x = as.matrix(data[,-c(1,2,3,4,5)])
y = as.double(as.matrix(data[, 5]))
pred = predict(cv.train, newx = x, s=cv.train$lambda.1se, type="response")
pred[pred > .3] = 1
pred[pred < .3] = 0

data.plot = bind_cols(data, as.data.frame(y), as.data.frame(pred)) %>%
  mutate(hits = ifelse(y == 1 & `1` == 1, "hit",
                ifelse(y == 0 & `1` == 1, "pos",
                ifelse(y == 1 & `1` == 0, "neg", NA))),
         label = ifelse(regexpr('.*', hits), as.character(volume), ''),
         hits = as.factor(hits))
  
ggplot(data.plot, aes(x = volume, y = euclidian_trans_deriv)) +
  geom_line(size = .25) +
  geom_point(data = subset(data.plot, !is.na(hits)), aes(color = hits), size = 2.5) +
  geom_text(aes(label = label), size = 1.5) +
  facet_wrap(~ subjectID + run, ncol = 4, scales = "free") +
  scale_color_manual(values = c("#3B9AB2","#EBCC2A","#F21A00")) +
  theme(axis.text.x = element_text(size = 6))

indicators separately

apply intensity and rp thresholds

# auto = joined %>%
#   group_by(subjectID, run) %>%
#   mutate(Diff.mean = volMean - lag(volMean),
#          Diff.sd = volSD - lag(volSD)) %>%
#   filter(tile == 1 | tile == 10) %>%
#   ungroup %>%
#   mutate(meanDiff.mean = mean(Diff.mean, na.rm=TRUE),
#          sdDiff.mean = sd(Diff.mean, na.rm=TRUE),
#          meanDiff.sd = mean(Diff.sd, na.rm=TRUE),
#          sdDiff.sd = sd(Diff.sd, na.rm=TRUE),
#          
#          # code volumes above mean thresholds as trash
#          trash.mean = ifelse(Diff.mean > (meanDiff.mean + 3*sdDiff.mean) | Diff.mean < (meanDiff.mean - 1.5*sdDiff.mean), 1, 0),
#          trash.sd = ifelse(Diff.sd > (meanDiff.sd + 3*sdDiff.sd) | Diff.sd < (meanDiff.sd - 3*sdDiff.sd), 1, 0),
#          
#          # code volumes with more than +/- .3mm translation in Euclidian distance
#          trash.rp = ifelse(euclidian_trans_deriv > .3 | euclidian_trans_deriv < -.3, 1, trash.rp),
#          # code volumes with more than +/- .3mm translation in Euclidian distance
#          trash.rp = ifelse(euclidian_rot_deriv > .3 | euclidian_rot_deriv < -.3, 1, trash.rp),
#          
#          # recode as trash if volume behind and in front are both marked as trash
#          trash.mean = ifelse(trash.mean == 0 & lag(trash.mean) == 1 & lead(trash.mean) == 1, 1, trash.mean),
#          trash.sd = ifelse(trash.sd == 0 & lag(trash.sd) == 1 & lead(trash.sd) == 1, 1, trash.sd),
#          trash.rp = ifelse(trash.rp == 0 & lag(trash.rp) == 1 & lead(trash.rp) == 1, 1, trash.rp),
#          
#          # code first volume as trash if second volume is trash
#          trash.mean = ifelse(volume == 1 & lead(trash.mean) == 1, 1, trash.mean),
#          trash.sd = ifelse(volume == 1 & lead(trash.sd) == 1, 1, trash.sd),
#          trash.rp = ifelse(volume == 1 & lead(trash.rp) == 1, 1, trash.rp)) %>%
#   
#   mutate(trash.mean = ifelse(is.na(trash.mean), 0, trash.mean),
#          trash.sd = ifelse(is.na(trash.sd), 0, trash.sd),
#          hits.rp = ifelse(trash.rp == 1 & (striping == 2 | intensity == 2), "hit",
#                 ifelse(trash.rp == 1 & (striping == 1 | intensity == 1), "hit.light",
#                 ifelse(trash.rp == 1 & (striping == 0 | intensity == 0), "pos",
#                 ifelse(trash.rp == 0 & (striping == 2 | intensity == 2), "neg",
#                 ifelse(trash.rp == 0 & (striping == 1 | intensity == 1), "neg.light", NA))))),
#          hits.mean = ifelse(trash.mean == 1 & (striping == 2 | intensity == 2), "hit",
#                 ifelse(trash.mean == 1 & (striping == 1 | intensity == 1), "hit.light",
#                 ifelse(trash.mean == 1 & (striping == 0 | intensity == 0), "pos",
#                 ifelse(trash.mean == 0 & (striping == 2 | intensity == 2), "neg",
#                 ifelse(trash.mean == 0 & (striping == 1 | intensity == 1), "neg.light", NA))))),
#          hits.sd = ifelse(trash.sd == 1 & (striping == 2 | intensity == 2), "hit",
#                 ifelse(trash.sd == 1 & (striping == 1 | intensity == 1), "hit.light",
#                 ifelse(trash.sd == 1 & (striping == 0 | intensity == 0), "pos",
#                 ifelse(trash.sd == 0 & (striping == 2 | intensity == 2), "neg",
#                 ifelse(trash.sd == 0 & (striping == 1 | intensity == 1), "neg.light", NA))))),
#          label = ifelse(regexpr(".*", hits.rp) | regexpr(".*", hits.mean) | regexpr(".*", hits.sd), as.character(volume), '')) %>%
#   select(subjectID, run, volume, Diff.mean, Diff.sd, volMean, volSD, starts_with("euclidian"), hits.mean, hits.sd, hits.rp, tile, label)

apply absolute thresholds and plot

# test = joined %>%
#   group_by(subjectID, run, tile) %>%
#   mutate(freqtile_power_c = freqtile_power - mean(freqtile_power)) %>%
#   ungroup() %>%
#   filter(tile == 1 | tile == 10) %>%
#   select(-freqtile_power) %>%
#   spread(tile,freqtile_power_c) %>%
#   mutate(red_zone = ifelse(`1` < -.03 & `10` > .0002, TRUE, FALSE),
#          red_zone = ifelse(lead(red_zone) == TRUE, TRUE, red_zone),
#          hits = ifelse(red_zone == TRUE & (striping == 2 | intensity == 2), "hit",
#                 ifelse(red_zone == TRUE & (striping == 1 | intensity == 1), "hit.light",
#                 ifelse(red_zone == TRUE & (striping == 0 | intensity == 0), "pos",
#                 ifelse(red_zone == FALSE & (striping == 2 | intensity == 2), "neg",
#                 ifelse(red_zone == FALSE & (striping == 1 | intensity == 1), "neg.light", NA))))),
#          label = ifelse(hits, as.character(volume), ''),
#          hits = as.factor(hits)) %>%
#   gather(tile, freqtile_power_c, c(`1`, `10`)) %>%
#   mutate(tile = as.numeric(tile))

apply threshold using SDs and plot

# test_diff = joined %>%
#   group_by(subjectID, run, tile) %>%
#   mutate(freqtile_power_c = freqtile_power - mean(freqtile_power),
#          diff = freqtile_power - lag(freqtile_power)) %>%
#   select(-freqtile_power, -freqtile_power_c) %>%
#   filter(tile == 1 | tile == 10) %>%
#   spread(tile,diff) %>%
#   ungroup() %>%
#   mutate(upper_1 = (mean(`1`,na.rm=TRUE) + 1.25*sd(`1`, na.rm=TRUE)),
#          lower_1 = (mean(`1`,na.rm=TRUE) - 1.25*sd(`1`, na.rm=TRUE)),
#          upper_10 = (mean(`10`,na.rm=TRUE) + 1.25*sd(`10`, na.rm=TRUE)),
#          lower_10 = (mean(`10`,na.rm=TRUE) - 1.25*sd(`10`, na.rm=TRUE))) %>%
#   mutate(red_zone_1 = ifelse(`1` > upper_1 | `1` < lower_1, TRUE, FALSE),
#          red_zone_10 = ifelse(`10` > upper_10 | `10` < lower_10, TRUE, FALSE),
#          red_zone = ifelse(red_zone_1 == TRUE & red_zone_10 == TRUE, TRUE, FALSE),
#          red_zone = ifelse(lead(red_zone) == TRUE, TRUE, red_zone),
#          red_zone = ifelse(is.na(red_zone), 0, red_zone),
#          hits = ifelse(red_zone == TRUE & (striping == 2 | intensity == 2), "hit",
#                 ifelse(red_zone == TRUE & (striping == 1 | intensity == 1), "hit.light",
#                 ifelse(red_zone == TRUE & (striping == 0 | intensity == 0), "pos",
#                 ifelse(red_zone == FALSE & (striping == 2 | intensity == 2), "neg",
#                 ifelse(red_zone == FALSE & (striping == 1 | intensity == 1), "neg.light", NA))))),
#          label = ifelse(hits, as.character(volume), ''),
#          hits = as.factor(hits)) %>%
#   gather(tile, diff, c(`1`, `10`)) %>%
#   mutate(tile = as.numeric(tile))

plot

mean intensities

# ggplot(filter(auto, tile == 1), aes(x = volume, y = volMean)) +
#   geom_line(size = .25) +
#   geom_point(data = subset(auto, !is.na(hits.mean)), aes(color = hits.mean), size = 2.5) +
#   geom_text(aes(label = label), size = 2, position = position_nudge(x = 1.5, y = .000025)) +
#   facet_wrap(~ subjectID + run, ncol = 4, scales = "free") +
#   scale_color_manual(values = c("#3B9AB2","#78B7C5","#9EBE91","#EBCC2A","#F21A00")) +
#   #geom_hline(data = thresholds, aes(yintercept = upper), color = "#F21A00") +
#   #geom_hline(data = thresholds, aes(yintercept = lower), color = "#F21A00") +
#   theme(axis.text.x = element_text(size = 6))

sd intensities

# ggplot(filter(auto, tile == 1), aes(x = volume, y = volSD)) +
#   geom_line(size = .25) +
#   geom_point(data = subset(auto, !is.na(hits.sd)), aes(color = hits.sd), size = 2.5) +
#   geom_text(aes(label = label), size = 2, position = position_nudge(x = 1.5, y = .000025)) +
#   facet_wrap(~ subjectID + run, ncol = 4, scales = "free") +
#   scale_color_manual(values = c("#3B9AB2","#78B7C5","#9EBE91","#EBCC2A","#F21A00")) +
#   #geom_hline(data = thresholds, aes(yintercept = upper), color = "#F21A00") +
#   #geom_hline(data = thresholds, aes(yintercept = lower), color = "#F21A00") +
#   theme(axis.text.x = element_text(size = 6))

translation

# ggplot(filter(auto, tile == 1), aes(x = volume, y = euclidian_trans_deriv)) +
#   geom_line(size = .25) +
#   geom_point(data = subset(auto, !is.na(hits.rp)), aes(color = hits.rp), size = 2.5) +
#   geom_text(aes(label = label), size = 2, position = position_nudge(x = 1.5, y = .000025)) +
#   facet_wrap(~ subjectID + run, ncol = 4, scales = "free") +
#   scale_color_manual(values = c("#3B9AB2","#78B7C5","#9EBE91","#EBCC2A","#F21A00")) +
#   #geom_hline(data = thresholds, aes(yintercept = upper), color = "#F21A00") +
#   #geom_hline(data = thresholds, aes(yintercept = lower), color = "#F21A00") +
#   theme(axis.text.x = element_text(size = 6))

freqtile power

# thresholds = test %>% select(subjectID, run, tile) %>% unique(.) %>% mutate(y = ifelse(tile == 1, -.03, .0002))
# 
# ggplot(test, aes(x = volume, y = freqtile_power_c)) +
#   geom_line(size = .25) +
#   geom_point(data = subset(test, !is.na(hits)), aes(color = hits), size = 2.5) +
#   geom_text(aes(label = label), size = 2, position = position_nudge(x = 1.5, y = .000025)) +
#   facet_wrap(~subjectID + run + tile, scales = "free", ncol = 4) +
#   scale_color_manual(values = c("#3B9AB2","#78B7C5","#9EBE91","#EBCC2A","#F21A00")) +
#   geom_hline(data = thresholds, aes(yintercept = y), color = "#F21A00") +
#   theme(axis.text.x = element_text(size = 6))

freqtile SD

# thresholds = test_diff %>% 
#   select(subjectID, run, tile, diff) %>% 
#   group_by(tile) %>%
#   mutate(upper = mean(diff, na.rm=TRUE) + 1*sd(diff, na.rm=TRUE),
#          lower = mean(diff, na.rm=TRUE) - 1*sd(diff, na.rm=TRUE)) %>%
#   select(-diff) %>%
#   unique(.)
# 
# ggplot(test_diff, aes(x = volume, y = diff)) +
#   geom_line(size = .25) +
#   geom_point(data = subset(test_diff, !is.na(hits)), aes(color = hits), size = 2.5) +
#   geom_text(aes(label = label), size = 2, position = position_nudge(x = 1.5, y = .000025)) +
#   facet_wrap(~ subjectID + run + tile, ncol = 4, scales = "free") +
#   scale_color_manual(values = c("#3B9AB2","#78B7C5","#9EBE91","#EBCC2A","#F21A00")) +
#   geom_hline(data = thresholds, aes(yintercept = upper), color = "#F21A00") +
#   geom_hline(data = thresholds, aes(yintercept = lower), color = "#F21A00") +
#   theme(axis.text.x = element_text(size = 6))